*Written by Luo Mi, date unkonwn.
*Audited by David Zhang, 6/14/16
*Audited by Jason Premo, 6/16/16
*Largely rewritten by Jason Premo, 7/2016
*(Nearly every comment is by Jason, even those not labeled as such.)

*Generates the balance sheet estimates in Townsend/results/balance_sheet.do.
*These estimates are used in the SST paper.

*File: /shared/sdsjxs/Townsend/scripts/scf_clean.do

clear all
set more off
set maxvar 20000
cap log close
gl path1 = "/shared/sdsjxs/Townsend/rawdata/scf"
gl path2 = "/shared/sdsjxs/Townsend/workingdata"
gl path3 = "/shared/sdsjxs/Townsend/results"
gl path4 = "/shared/sdsjxs/Townsend/graphics"

gl do_path = "/shared/sdsjxs/Townsend/scripts"
gl log_path = "/shared/sdsjxs/Townsend/logfiles"

*log using "$log_path/scf_means.log", replace 


*A program that converts variables which may be reported at different frequencies
*into their appropriate annualized amounts
cap program drop convert
program define convert
	args var freq_var
	replace `var' = `var'*365 if `freq_var' == 1
	replace `var' = `var'*(365/7) if `freq_var' == 2
	replace `var' = `var'*(365/14) if `freq_var' == 3
	replace `var' = `var'*(12) if `freq_var' == 4
	replace `var' = `var'*4 if `freq_var' == 5
	replace `var' = `var'*2 if `freq_var' == 11
	replace `var' = `var'*6 if `freq_var' == 12
	replace `var' = `var'*24 if `freq_var' == 31
	replace `var' = 0 if `freq_var' == -1
	replace `var' = . if `freq_var' == -7
end

* Creates a variable from a list of SCF variables. It turned out to not be
* quite as useful as expected because of weird idiosyncracies in the data.
cap program drop createvar
program define createvar
	syntax varlist, GENerate(string)
	local i = 0
	foreach variable in `varlist' {
		local length = length("`variable'") - 1
		gen double `generate'_`i' = `variable'
		replace `generate'_`i' = 0 if `generate'_`i' == -1
		replace `generate'_`i' = . if `generate'_`i' < -1
		local num = substr("`variable'", 2, `length')
		local num = `num'+1
		convert `generate'_`i' "X`num'"
		local ++i
		}
	egen double `generate' = rowtotal(`generate'*)
	drop `generate'_*
end


*Basically do all the collapse formatting for me (and you!)
*The remove argument allows you to subtract out unnecessary variables
* to avoid double-counting (or counting weights). income is a yes or no
* argument added at the last minute that allows me to create the censored income.
cap program drop output_all
program define output_all
	args remove sheetname income
	
	drop X* Y*
	qui describe, varlist
	egen double all = rowtotal(`r(varlist)')
	replace all = all `remove'

	if "`income'" == "yes" {
		gen all_censored = all if all <= 200000
		}
	
	qui describe, varlist
	local meanlist = ""
	local medianlist = ""

	*Automatically creates a set of conditions for the collapse.
	*Not really using the medians anymore, but I've kept the code just in case.
	foreach variable in `r(varlist)' {
		if "`variable'" == "aw" {
			continue
			}
		local tempname1 = "mean_`variable' = `variable' "
		local tempname2 = "med_`variable' = `variable' "
		local meanlist = "`meanlist'"+"`tempname1'"
		local medianlist = "`medianlist'"+"`tempname2'"
		}
	
	preserve
	
	collapse (mean) `meanlist' (p50) med_all=all [aw = aw]
	xpose, varname clear
	rename _varname varname
	order varname
	replace v1 = round(v1)

	export excel using "$path3/balance_sheet.xlsx", sheet("scf_`sheetname'", replace)
end




******************************** SCF 2013 **************************************

use $path1/p13i6.dta, clear

* Analytical weight = X42001
*  Weights need to be divided by 5 to account for the implicates created by
*  imputation. See the 2013 SCF's codebook ("IMPUTATION" section) for details.
*  (I don't think this matters for the point estimates we're creating, but
*  but better safe than sorry.)

*A bit of initial cleaning
drop J*
qui describe, varlist
gen aw = X42001/5

*In the SCF, a value of -1 means that the account is empty/asset is worthless, etc. Zero means
*they don't own the asset. For our purposes, these are the same thing. Anything below -1 generally
*means that there is some issue with the variable, and it should be treated as missing.

foreach item in `r(varlist)' {
	qui replace `item' = 0 if `item' == -1
	qui replace `item' = . if `item' < -1 
	}

tempfile start
save "`start'", replace


********************************* 1. Assets ************************************

* Bond face value (not currently included): X3906 X3910 X3908 X7633 X7634 X6705
* Market value (included): X7635 X7617 X7636 X7638 X7639 X6706

* Charitable trust: X7661 (not sure if this should be counted... probably not?)

*Small, necessary cleaning
foreach i in 0 1 3 4 {
	replace X11`i'32 = X11`i'32+X11`i'27 if X11`i'33 == 1
	}

gen double currency = 0
*The occasional loops account for the "catch-all" asset questions, which sometimes overlap
* with our categories.
forvalues i = 20(4)28 {
	local j = `i'+2
	replace currency = currency + X40`j' if X40`i' == 63
	replace X40`j' = 0 if X40`i' == 63
	}

gen double checking = X3506 + X3510 + X3514 + X3518 + X3522 + X3526 + X3529
gen double savings = X3730 + X3736 + X3742 + X3748 + X3754 + X3760 + X3765
gen double other_acct = 0
forvalues i = 20(4)28 {
	local j = `i'+2
	replace other_acct = other_acct + X40`j' if inlist(X40`i', 82, 84)
	replace X40`j' = 0 if inlist(X40`i', 82, 84)
	}

gen double cd = X3721
gen double mutualfund	= X3822 + X3824 + X3826 + X3828 + X3830 + X6704 + X3930 + X7787
gen double stock = X3915 + X3922 + X7641
forvalues i = 20(4)28 {
	local j = `i'+2
	replace stock = stock + X40`j' if inlist(X40`i', 74)
	replace X40`j' = 0 if inlist(X40`i', 74)
	}


gen double bond	= X7637 + X7636 + X6706 + X7635 + X7639 + X3902 
gen double ira 	= X6551 + X6559 + X6567 + X6552 + X6560 + X6568 + X6553 + X6561 +	X6569 + X6554 + X6562 + X6570 + X6756 + X6757 + X6758

replace X6997 = 0 if X7360 == 1 | X7360 == 2
gen double pension = X5604 + X5612 + X5620 + X5628 + X6462 + X6467 + X6472 + X6477 + X11032 + X11132 + X11332 + X11432 + X11259 + X11559
gen double retire = ira + pension
drop ira pension

* Current cash value of all payments made so far; for face value see X4005
gen double insur_life	= X4006
* Annuties that can be cashed in
gen double annuity = X6577
gen double trust = X6587 
gen double loan_out = X1310 + X1329 + X1339 + X3124 + X3224 + X4018 
forvalues i = 20(4)28 {
	local j = `i'+2
	replace loan_out = loan_out + X40`j' if inlist(X40`i', 61)
	replace X40`j' = 0 if inlist(X40`i', 61)
	}

gen double vehic 	= X8166 + X8167 + X8168 + X8188 + X2422 + X2506 + X2606 + X2623
/// + X2206 + X2306 + X2406 + X7155
gen double other_fin_asset = 0
forvalues i = 20(4)28 {
	local j = `i'+2
	replace loan_out = loan_out + X40`j' if inlist(X40`i', 62, 67, 72, 81, 83)
	replace X40`j' = 0 if inlist(X40`i', 62, 67, 72, 81, 83)
	}
* Some division necessary to account for ownerships shares.
gen double prim_housing = X716 + X604 + X614 + X623
gen double other_housing =  X2002 + X2012+  X1706*(X1705/10000) + X1806*(X1805/10000)
gen double business = X3129 + X3229 + X3408 + X3412 + X3452 + X3416 + X3420 + X3428 +  X3335 + X513 + X526

* If the above was done right, there is no double-counting here
gen double other_nonfin_asset = X4022 + X4026 + X4030 


output_all " - aw" "assets"

*********************************** 2. Debts ***********************************

use `start', clear

egen double creditcard = rowtotal(X413 X421 X427 X430)
gen double other_revolving 	= X7575
gen double mortgage_prim = X805 
gen double mortgage_other = X905 + X1005 + X1035 + X1318 + X1337 + X1342 + X1715 + X1815 + X2006 + X2016 + X1044
gen double heloc 	= X1108 + X1109 + X1130 + X1136 + X1005
gen double loan_improve = X1215 + X1219 
gen double loan_vehic 	= X2218 + X2318 + X2418 + X7169 + X2424 + X2519 + X2619 + X2625
gen double loan_educ 	= X7824 + X7847 + X7870 + X7924 + X7947 + X7970 + X7179 + X7180
gen double loan_busi 	= X3126 + X3226 + X3121 + X3221
gen double loan_margin = X3932	
gen double loan_pen 	= X11027 + X11127 + X11327 + X11427
gen double loan_other 	= X2723 + X2740 + X2823 + X2840 + X2923 + X2940 + X7183 + X4010 + X4032 


output_all " - aw" "liabilities"


********************************* 3. Income ************************************

use `start', clear

*Total income: X5729 

*Gift/Inheritance (annual): X5804 X5809 X5814 if X5805 X5810 X5815 == 2012

*Support from ex (+1 for freq): X5926 X6650 

*Annual
gen double tot_income_reported = X5729

*Annual
gen double wageinc 	= X5702
*Annual
egen double busifarminc = rowtotal(X5704 X5714)
*Annual
gen double intinc 	= X5708 + X5706
*Annual
gen double divinc 	= X5710
*Annual
gen double cginc 	= X5712

*Annual
gen double govtran = X5720 + X5716 + X5722
*Annual
gen double childsupport_alimony = X5718


*Pension withdrawals -- done by frequency
* Don't need these, because they're just asset transfers. I'm keeping this variable list
* for interest.
*createvar X6558 X6566 X6574 X6474 X6479 X6965 X6971 X6977 X6983, gen(pension_withdrawals)

*Annual
*Need to check that these all belong in the "other" category. Some of them may go in some of the other categories above.
gen double otherinc = X5724
gen double int_div_cg_inc 	= intinc + divinc + cginc
drop intinc divinc cginc


** ACCORDING TO CODE BOOK, TOTAL INCOME SHOULD BE X5702+X5704+X5706+X5708+X5710+X5712+X5714+X5716+X5718+X5720+X5722+X5724 by the IRS definition.
** Also note that some of these variables are actually net of costs, but none of them are net of taxes.

*Output
output_all "- aw - tot_income_reported" "income" yes



*********** 4. Expenditures ***********

use `start', clear


*Food
createvar X3024 X3027 X3029, gen(food)

*Rent
createvar X521 X602 X708 X612 X619 X703, gen(rent)

*Mortgage
createvar X808 X908 X1008 X813 X913 X1013 X1040 X2007 X2017 X1718 X1818 X1723 X1823, gen(mortgage)
gen Xmortgage_extra = X1039
convert Xmortgage_extra "X7567"
replace mortgage = mortgage+Xmortgage_extra

gen housing = rent+mortgage
drop rent mortgage

*Transportation
*Annoyingly, some of the variable codings here do not follow the standard format...
createvar X2105 X7162 X2626 X2214 X2314 X2414 X7164 X2425 X2515 X2615, gen(transportation)
gen Xtrans7 = X2213
replace Xtrans7 = 0 if Xtrans7 == -1
replace Xtrans7 = . if Xtrans7 < -1
convert Xtrans7 X7537

gen Xtrans8 = X2313
replace Xtrans8 = 0 if Xtrans8 == -1
replace Xtrans8 = . if Xtrans8 < -1
convert Xtrans8 X7536

gen Xtrans9 = X2413
replace Xtrans9 = 0 if Xtrans9 == -1
replace Xtrans9 = . if Xtrans9 < -1
convert Xtrans9 X7535

gen Xtrans10 = X2514
replace Xtrans10 = 0 if Xtrans10 == -1
replace Xtrans10 = . if Xtrans10 < -1
convert Xtrans10 X7531

gen Xtrans11 = X2614
replace Xtrans11 = 0 if Xtrans11 == -1
replace Xtrans11 = . if Xtrans11 < -1
convert Xtrans11 X7530

egen Xtrans_total = rowtotal(Xtrans*)
replace transportation = transportation + Xtrans_total

*Education
local i = 0
createvar X7815 X7838 X7861 X7915 X7938 X7961 X7817 X7840 X7863 X7917 X7940 X7963 X7180, gen(education)

*Other consumer loans
*More annoying nonstandard variable codings...
local i = 0
local freq_vars "X7527 X7526 X7525 X7524 X7523 X7522 X7185"
foreach variable in X2718 X2735 X2818 X2835 X2918 X2935 X7184 {
	gen Xconsloan`i' = `variable'
	replace Xconsloan`i' = 0 if Xconsloan`i' == -1
	replace Xconsloan`i' = . if Xconsloan`i' < -1
	local j = `i'+1
	local freq_var : word `j' of `freq_vars'
	convert Xconsloan`i' `freq_var'
	local ++i
	}
egen double consloan = rowtotal(Xconsloan*)

*Life insurance
createvar X4011 X4014, gen(lifeins)

*Pensions
createvar X11042 X11142 X11342 X11442 X11045 X11145 X11345 X11445, gen(pension)

*Loan against pension
createvar X11028 X11128 X11328 X11428, gen(pension_loan)

*Heloc
createvar X1109 X1120 X1131, gen(heloc)

*Home improvement loans
createvar X1211 X1220, gen(homeimprovement_loan)
convert X1210 X7565
replace homeimprovement_loan = homeimprovement_loan + X1210

gen insur_finserv_pens = lifeins + pension_loan + pension + heloc + homeimprovement_loan + consloan
drop lifeins pension_loan pension heloc homeimprovement_loan consloan

*Real estate taxes
gen double tax_realestate = X721
convert tax_realestate X722

*The codebook says that X5928 has an associated frequency variable X5929, but that variable isn't in the
*dataset for some reason. Same with X6652/X6653
gen double misc = X5734 + X5732 + X5928 + X6652

*Charitable contributions
gen charity = X5823




output_all "- aw" "expenditure"



****** Additional cash flow stuff

*Only two variables so far... seems sortof silly to create a whole new sheet. Oh well.
use `start', clear

egen ira_withdrawals = rowtotal(X6558 X6566 X6574)
*createvar X11045 X11145 X11345 X11445, gen(pension_contributions)

output_all "- aw" "cash_flow"


cap log close
